home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The Business Master (3rd Edition)
/
The Business Master (3rd Edition).iso
/
files
/
wordmisc
/
pcspell
/
speller2.pa4
< prev
next >
Wrap
Text File
|
1988-03-29
|
13KB
|
350 lines
{$N-} {No numeric coprocessor}
{$S-}
{$V-}
PROGRAM SPELLER2; { SPELL CHECKER -- with cmd line }
{ This spell checker is based on the ideas contained in PC-SPELL ver
1.15 in BASIC by Andy Wildenberg. In that program the text file is
read into memory and put into a list of words in a string array. The
string array is then sorted and the unique words removed into
another array. Thus a unique word array is formed which is in
alphabetical order. This word list is then compared to a dictionary
file which is an ASCII list of legal words also in alphabetical
order. If the word is not found then it is placed into a file of
possible misspelled words on disk. The user is then responsible for
printing the list of misspelled words and using a global change
feature in a word processor to find and replace the words with the
correct spelling.
This spell checker works in much the same way except that a unique
word file is formed in an array alphabetically as the text file is
parsed into words. The rest of the process is about the same.
To use, just type the name of the program followed by a parameter
specifying the file. The parameter is optional and if ommitted then the
program will request this name.
Version SPELLER2 is compatible with WINDOWS facilities and adds a alternate
dictionary file as an optional second parameter on the command line. This
version has been converted to compile with Turbo Pascal version 4.x.
J. Leeson, March 29, 1988
}
{ *************************************************************************}
CONST
WORDSIZE : integer = 16;
TYPE
STRPARAM = string;
WORDTYP = string [16];
WORDPTR = ^WORDTYP;
PTRARRAY = array [0..4000] of WORDPTR; {Limited to 8191 because the
Move function requires an integer parameter
for length in bytes of data to move. SPELLERW
reduced to 4000 to reduce memory requirements.}
VAR
SRCNAME : string; { Name of source file to spell check }
ALTDICNAME : string; { Name of the alternate dictionary file }
OPPATH : string; { DOS path for speller files }
OPNAME : string; { DOS name for speller files }
OUTNAME : string; { Name of output file ( default srcfile.MIS) }
DOCWORDCNT, UNIQUECNT, MISSPELLCNT : integer;
I : integer;
WORDINDX : PTRARRAY;
A_WORD, ALTWORD, TEMP1 : WORDTYP;
PREFIX : string [1];
MATCH, ALTDIC : boolean;
SRCFILE, DICFILE, ALTDICFILE, MISSFILE : text;
ABuf, SBuf : array[0..$fff] of char; {buffers for source and altdic}
DBuf : array[0..$1fff] of char; {buffer for dictionary file}
x : byte;
PATHSTRING : string; { working storage for path strings }
FUNCTION LOWCASE (var A : char) : boolean;
{ *************************************************************************}
{ LOWCASE modifies the character parameter "A" to make it a lower case
alpha character if it is an upper case alpha. If the character
parameter is alpha ('a'..'z' or 'A'..'Z') then the function returns
TRUE else it returns FALSE. }
{ *************************************************************************}
var x : byte;
begin
x := ord (A);
if (x>96) and (x<123) then LOWCASE := true
else begin
if (x>64) and (x<91) then
begin
A := chr (x+32);
LOWCASE := true;
end
else LOWCASE := false;
end;
end; { of LOWCASE }
PROCEDURE GETWORD (var FILNAME : text; var A_WORD : WORDTYP);
{ *************************************************************************}
{GETWORD version 1.2. Defines the start of a word as the next alpha
character in the file. A word is formed by adding characters until a
non-alpha character is found. Contractions are accepted as identified by
a single quote followed by an alpha character occuring after the SOW.
Upper case letters are converted to lower case.}
{ *************************************************************************}
VAR
CH, CH2 : char;
SOW : boolean;
{Global WORDSIZE = maximum word length value.}
begin
SOW := false;
A_WORD := '';
repeat
read (FILNAME, CH);
if LOWCASE (CH) then SOW := true
until SOW or eof (FILNAME);
if SOW then
begin
A_WORD := CH;
repeat
read (FILNAME, CH);
if LOWCASE (CH) then
begin
if Length (A_WORD) < WORDSIZE then A_WORD := A_WORD + CH
else SOW := false;
end
else begin
if CH <> '''' then SOW := false
else begin
if not Eof (FILNAME) then
begin
Read (FILNAME, CH2);
if LOWCASE (CH2) then
begin
if Length (A_WORD) < WORDSIZE-1 then
A_WORD := A_WORD + CH + CH2 else SOW := false;
end
else SOW := false;
end;
end;
end;
until (not SOW) or eof (FILNAME);
end;
end; { of GETWORD }
procedure ADDUNIQUE (var LIST : PTRARRAY; A_WORD : WORDTYP; var TOP : integer);
{ ***************************************************************************}
{ This procedure does a binary search of the LIST looking for the location
where A_WORD belongs. Once it finds the place, if A_WORD is there then it
exits. If not, then it moves the list up by one pointer and puts the new
word there.}
{ ***************************************************************************}
var
SEARCH : boolean;
MID, LOW, HIGH, COUNT : integer;
begin
SEARCH := true;
LOW := 0; MID := Trunc (TOP/2); HIGH := TOP;
while SEARCH do {** Find the place where A_WORD belongs. **}
begin
if MID = LOW then SEARCH := false
else begin
if A_WORD < LIST [MID]^ then HIGH := MID
else LOW := MID; {** A_WORD is >= word at LIST [MID]^ **}
MID := LOW + Trunc ((HIGH-LOW)/2);
end;
end; {** of SEARCH. MID is at the location containing A_WORD or else
A_WORD goes at the location after MID. **}
if A_WORD <> LIST [MID]^ then begin
COUNT := 4*(TOP-MID);
MID := MID+1;
Move (LIST [MID], LIST [MID+1], COUNT);
TOP := TOP+1;
new (LIST [MID]);
LIST [MID]^ := A_WORD;
end;
end;
Function DosPath : string;
{ **************************************************************************}
{ This function extracts the 'PATH =' string from the DOS environment passed
by DOS to the applications program and returns the string, else returns nul.
Restructured for 4.0 Turbo Pascal.}
{ **************************************************************************}
type
AThing = ^EnvThing;
EnvThing = array[1..255] of char; {It's a buncha ASCIIZ strings}
var
I : word;
X : integer;
DosEnvSeg : word;
DosEnvPtr : AThing;
DosEnv : EnvThing;
PathString, EnvString : string;
begin
DosEnvSeg := MemW[PrefixSeg:$002c]; {Segment passed by DOS is here}
I := 0;
PathString := '';
repeat {DOS always passes a COMSPEC= environment string}
DosEnvPtr := Ptr(DosEnvSeg,I);
EnvString := DosEnvPtr^;
length(EnvString) := pos(chr(0), EnvString)-1; {ASCIIZ strings}
{If two consecutive zero bytes then length(EnvString) will be zero}
I := I+length(EnvString)+1; {Moves the pointer to next string}
X := Pos('PATH=',EnvString);
if X <> 0 then PathString :=
copy(EnvString, X+5, length(EnvString)-(X+4));
until (PathString <> '') or (EnvString = ''); {Two zero bytes end it}
DosPath := PathString;
end;
Function ParsePath (Var PATHSTRING : STRPARAM) : string;
{ ***************************************************************************}
{ This function returns the first substring of PATHSTRING which is terminated
by the end of the string or by a semicolon. It then alters the input variable
PATHSTRING to remove this part of the string. Thus subsequent calls to
ParsePath will return one part of the parameter string until it is all gone
and will then return a nul string. }
{ ***************************************************************************}
var
x : integer;
begin
if length (PATHSTRING) = 0 then ParsePath := '' else begin
x := Pos (';',PATHSTRING);
if x=0 then begin
ParsePath := PATHSTRING;
PATHSTRING := '';
end
else begin
ParsePath := Copy (PATHSTRING, 1, x-1);
PATHSTRING := Copy (PATHSTRING, x+1, Length (PATHSTRING));
end;
end;
end;
begin { ******************************************************************}
{ ******** MAIN PROGRAM ***********}
{ ******************************************************************}
DOCWORDCNT := 0; MISSPELLCNT := 0; ALTDIC := true;
if ParamCount = 0 then begin
write ('Source file : ');
readln (SRCNAME);
ALTDICNAME := '';
write ('Alternate dictionary : ');
readln (ALTDICNAME);
end
else begin
SRCNAME := ParamStr (1);
if ParamCount = 1 then ALTDICNAME := '' else ALTDICNAME := ParamStr (2);
end;
if ALTDICNAME = '' then ALTDIC := false;
assign (SRCFILE, SRCNAME);
SetTextBuf (SRCFILE, SBuf); {Turbo 4.0 setup for I/O buffers}
{$I-} reset (SRCFILE) {$I+};
if IOResult <> 0 then begin
writeln ('Unable to read the source file. Aborting SPELLER.');
exit;
end;
if ALTDIC then begin
assign (ALTDICFILE, ALTDICNAME);
SetTextBuf (ALTDICFILE, ABuf); {Turbo 4.0 setup for I/O buffers}
{$I-} reset (ALTDICFILE) {$I+};
if IOResult > 0 then begin
writeln ('Alternate dictionary not found.');
ALTDIC := false;
end;
end;
{ Find the dictionary file in the current directory on the default
drive or else go searching for it using the DOS PATH command to
find drives and directories to search. }
PATHSTRING := DosPath;
MATCH := false;
OPPATH := '';
PREFIX := '';
while MATCH = false do begin
OPNAME := OPPATH + PREFIX + 'SPELLER.LIS';
assign (DICFILE, OPNAME);
SetTextBuf (DICFILE, DBuf); {Turbo 4.0 setup for I/O buffers}
{$I-} reset (DICFILE) {$I+};
x := IOResult;
MATCH := (x=0);
OPPATH := ParsePath (PATHSTRING);
if OPPATH = '' then MATCH := true
else begin
if (Pos (':',OPPATH) = Length (OPPATH)) or
(Pos ('\',OPPATH) = Length (OPPATH)) then PREFIX := ''
else PREFIX := '\';
end;
end;
if x<>0 then begin {I/O error... file not found usually}
writeln;
writeln ('Unable to locate the spelling list. Aborting SPELLER.');
close (SRCFILE);
if ALTDIC then close (ALTDICFILE); {Don't close it if it isn't open}
exit;
end;
I := Pos ('.',SRCNAME);
if I = 0 then OUTNAME := SRCNAME + '.MIS'
else OUTNAME := Copy (SRCNAME, 1, I-1) + '.MIS';
assign (MISSFILE, OUTNAME);
{$I-} rewrite (MISSFILE) {$I+};
if IOResult <> 0 then begin
writeln;
writeln ('Unable to open the output file. Error code is ',x);
{DOS error code for 4.0 Turbo Pascal}
writeln ('Program terminating.');
close (SRCFILE);
close (DICFILE);
if ALTDIC then close (ALTDICFILE); {Don't close it if it isn't open}
exit;
end;
{ If no EXIT's were encountered in getting the files opened then we
continue here with the files all open. }
Writeln ('READING ',SRCNAME);
UNIQUECNT := 1;
New (WORDINDX [1]);
WORDINDX [2] := nil;
WORDINDX [1]^ := '~';
while not eof (SRCFILE) do begin
GETWORD (SRCFILE, A_WORD);
if Length (A_WORD) > 1 then begin {Don't spell check one letter words}
DOCWORDCNT := DOCWORDCNT + 1;
ADDUNIQUE (WORDINDX, A_WORD, UNIQUECNT);
end;
end;
Close (SRCFILE);
{*** Check against dictionary ***}
writeln ('CHECKING SPELLING');
I := 1;
A_WORD := ''; ALTWORD := '';
while I <= UNIQUECNT-1 do begin {dump the ~ at the end of the list}
while (A_WORD < WORDINDX [I]^) and not Eof (DICFILE) do
Readln (DICFILE, A_WORD);
if A_WORD <> WORDINDX [I]^ then begin
if ALTDIC then
while (ALTWORD < WORDINDX[I]^) and not Eof (ALTDICFILE) do
ReadLn (ALTDICFILE, ALTWORD);
if ALTWORD <> WORDINDX [I]^ then begin
Writeln (MISSFILE, WORDINDX [I]^);
MISSPELLCNT := MISSPELLCNT +1;
end;
end;
I := I + 1;
end { while I <= ... };
Close (DICFILE);
Write (MISSFILE, Chr (26));
Close (MISSFILE);
if ALTDIC then close (ALTDICFILE); {Don't close it if it aint open}
writeln;
writeln ('Speller done. Statistics:');
writeln (' Source file: ', SRCNAME);
writeln (' Total words: ', DOCWORDCNT);
writeln (' Unique words: ', UNIQUECNT);
writeln (' Spelling errors: ', MISSPELLCNT);
End.